The Chinook database is a sample database for the SQL server, Oracle, MySQL, Postgres etc. It is ideal for demos, practicing and familiarzing oneself with relational database management. The Chinook data model represents a digital media retailer. It includes tables for music artists, music tracks, invoices, and customers.
Below is the entity relationship diagram of the Chinook data model.
# import SQLite and pandas
import sqlite3
import pandas as pd
# plotting library
import plotly.express as px
# connect to database
sql_connect = sqlite3.connect('chinook.db')
# cursor function to assist with executing queries
cursor = sql_connect.cursor()
# Save SQL query as a string, where sqlite_master is the table
query = "SELECT * FROM sqlite_master LIMIT 5;"
# Execute query using cursor object
results = cursor.execute(query).fetchall()
# Run, output will be similar to a pandas dataframe
pd.read_sql_query(query, sql_connect)
The 'column' tbl_name contains the list of available tables in the database
query = """
SELECT * FROM artists
LIMIT 5
"""
results = cursor.execute(query).fetchall()
pd.read_sql_query(query, sql_connect)
query = """
SELECT * FROM playlist_track
"""
results = cursor.execute(query).fetchall()
x = pd.read_sql_query(query, sql_connect)
x.head()
# Apply value_counts() function to query results
x.Country.value_counts()
query = """
SELECT name, type FROM sqlite_master
WHERE type IN('table', 'view')
"""
results = cursor.execute(query).fetchall()
pd.read_sql_query(query, sql_connect)
query = """
SELECT * FROM genres
LIMIT 5
"""
results = cursor.execute(query).fetchall()
pd.read_sql_query(query, sql_connect)
query = """
SELECT * FROM tracks
LIMIT 1
"""
results = cursor.execute(query).fetchall()
pd.read_sql_query(query, sql_connect)
query = """
SELECT * FROM artists
LIMIT 1
"""
results = cursor.execute(query).fetchall()
pd.read_sql_query(query, sql_connect)
query = """
SELECT * FROM invoice_items
LIMIT 1
"""
results = cursor.execute(query).fetchall()
pd.read_sql_query(query, sql_connect)
query = """
SELECT * FROM invoices
LIMIT 5
"""
results = cursor.execute(query).fetchall()
pd.read_sql_query(query, sql_connect)
query = """
SELECT * FROM invoices
LIMIT 5
"""
results = cursor.execute(query).fetchall()
pd.read_sql_query(query, sql_connect)
Query the Chinook database to answer the following questions:
q1 = """
WITH t1 AS
(SELECT * FROM invoice_items im
LEFT JOIN invoices iv ON iv.InvoiceId = im.InvoiceId
LEFT JOIN customers cs ON cs.CustomerId = im.InvoiceId)
SELECT gr.Name Genre,
SUM(t1.Quantity) Sales
FROM t1
t1 LEFT JOIN tracks tr ON tr.TrackId = t1.TrackId
LEFT JOIN genres gr ON gr.GenreId = tr.GenreId
GROUP BY Genre
ORDER BY Sales DESC
Limit 5;
"""
results = cursor.execute(q1).fetchall()
qp = pd.read_sql_query(q1, sql_connect)
fig = px.bar(qp,
x = 'Genre',
y = 'Sales',
color = 'Genre',
title = 'Top 5 best selling genres')
fig.show()
# rename table column name for joins
q = """
ALTER TABLE invoices RENAME BillingCountry TO Country
"""
# Genre sales by country
q2 = """ WITH t1 AS
(SELECT * FROM invoice_items im
LEFT JOIN invoices iv ON iv.InvoiceId = im.InvoiceId
LEFT JOIN customers cs ON cs.CustomerId = iv.CustomerId)
SELECT gr.Name Genre, cs.Country Country,
SUM(t1.Quantity) Sales
FROM t1
t1 LEFT JOIN tracks tr ON tr.TrackId = t1.TrackId
LEFT JOIN genres gr ON gr.GenreID = tr.GenreId
LEFT JOIN customers cs ON cs.Country = t1.Country
GROUP BY Genre
ORDER BY Sales DESC
"""
results = cursor.execute(q2).fetchall()
qp = pd.read_sql_query(q2, sql_connect)
fig = px.bar(qp,
x = 'Country',
y = 'Sales',
color = 'Genre',
title = 'Best selling genre in each country')
fig.show()
# Genre sales by country
q3 = """ WITH t1 AS
(SELECT * FROM invoice_items im
LEFT JOIN invoices iv ON iv.InvoiceId = im.InvoiceId
LEFT JOIN customers cs ON cs.CustomerId = iv.CustomerId)
SELECT gr.Name Genre, cs.Country Country,
SUM(t1.Quantity) Sales
FROM t1
t1 LEFT JOIN tracks tr ON tr.TrackId = t1.TrackId
LEFT JOIN genres gr ON gr.GenreID = tr.GenreId
LEFT JOIN customers cs ON cs.Country = t1.Country
GROUP BY Genre
ORDER BY t1.Country ;
"""
results = cursor.execute(q3).fetchall()
qp = pd.read_sql_query(q3, sql_connect)
fig = px.bar(qp,
x = 'Genre',
y = 'Sales',
color = 'Country',
title = 'Genre sale by country')
fig.show()
q4 = """
with t1 AS(
SELECT * FROM invoice_items im
LEFT JOIN invoices iv ON iv.InvoiceId = im.InvoiceId
LEFT JOIN customers cs ON cs.CustomerId = iv.CustomerId
)
SELECT cs.Country Country,
SUM(t1.Quantity) Sales
FROM t1
LEFT JOIN customers cs ON cs.Country = t1.Country
GROUP BY t1.Country
ORDER BY Sales DESC;
"""
results = cursor.execute(q4).fetchall()
qp = pd.read_sql_query(q4, sql_connect)
fig = px.bar(qp,
x = 'Country',
y = 'Sales',
color = 'Country',
title = 'Sale by country')
fig.show()
# average sales
q5 = """
WITH t1 AS (
SELECT CASE WHEN
(
SELECT COUNT(*) from customers
WHERE cs.Country = Country
) = 1 THEN "Other"
ELSE cs.Country END "Country", cs.CustomerId,
iv.*
FROM invoices iv
INNER JOIN customers cs on iv.CustomerId = cs.CustomerId
)
SELECT Country, total_sales, total_customers, avg_sales, avg_order_value
FROM (
SELECT Country,
SUM(total) total_sales,
COUNT(distinct CustomerId) total_customers,
SUM(total) / COUNT(distinct CustomerId) avg_sales,
SUM(total) / COUNT(distinct InvoiceId) avg_order_value
FROM t1
GROUP BY Country
ORDER BY total_sales DESC
)
"""
results = cursor.execute(q5).fetchall()
qp = pd.read_sql_query(q5, sql_connect)
fig = px.bar(qp,
x = 'Country',
y = 'total_sales',
color = 'Country',
title = 'Total sales by country')
fig.show()
query = """
SELECT * FROM playlist_track
"""
results = cursor.execute(query).fetchall()
x = pd.read_sql_query(query, sql_connect)
x.head()
q6 = """
SELECT at.Name 'artist',
COUNT(p.PlaylistId) 'count',
p.Name 'playlist',
gn.Name 'genre'
FROM playlist_track pl
LEFT JOIN tracks tr ON tr.TrackId = tr.TrackId
LEFT JOIN albums al ON al.AlbumId = tr.AlbumId
LEFT JOIN artists at ON at.ArtistId = al.ArtistId
LEFT JOIN genres gn ON tr.GenreId = gn.GenreId
LEFT JOIN playlists p ON p.PlaylistId = pl.PlaylistId
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10
"""
results = cursor.execute(q6).fetchall()
qp = pd.read_sql_query(q6, sql_connect)
qp
fig = px.bar(qp,
x = 'artist',
y = 'count',
color = 'genre',
title = 'Artist popularity in playlists')
fig.show()